

###########################################################
##### Haiti elite network project  		          			#####
##### network visualization	                      		#####
##### 2021 mar 03                   									#####
###########################################################

#####
## check naming conventions
#####

ind_tab <- read.csv('01_Data/02_Clean/gene_clean.csv')

ind_tab <- subset(ind_tab, select = c(no, name, last, sexe, pere, mere, spouse_1, spouse_2))

t <- ind_tab %>% 
  select(no_pere=no, name_pere=name, last_pere=last) 
ind_tab <- merge(ind_tab, t, by.x = 'pere', by.y = 'no_pere', keep = all.x)
prop.table(table(ind_tab$last_pere==ind_tab$last))
prop.table(table(ind_tab$last_pere[ind_tab$sexe=="FEMME"]==ind_tab$last[ind_tab$sexe=="FEMME"]))

t <- ind_tab %>% 
  select(no_spouse_1=no, name_spouse_1=name, last_spouse_1=last) 
ind_tab <- merge(ind_tab, t, by.x = 'spouse_1', by.y = 'no_spouse_1', keep = all.x)
prop.table(table(ind_tab$last_spouse_1==ind_tab$last))
prop.table(table(ind_tab$last_spouse_1[ind_tab$sexe=="FEMME"]==ind_tab$last[ind_tab$sexe=="FEMME"]))



#####
## text - number of marriages per family
#####

fam <- read.csv('01_Data/02_Clean/fam.csv')
all <- read.csv('01_Data/02_Clean/allfams.csv')

length(unique(all$fam))
length(unique(fam$fam))


mean(all$nind, na.rm=T)
mean(all$degree_all_uw, na.rm=T)

mean(fam$nind, na.rm=T)
mean(fam$degree_all_uw, na.rm=T)



#####
## TABLE A1: Stats of top 20 families
#####

## summarize info on top fams in importer dataset
tab <- fam %>% 
  select(fam, nind, degree_all_uw, bonw_02_wnind_st, biz, mil, pol, coup, immig, syrian, top_hs2) %>%
  arrange(desc(bonw_02_wnind_st))
tab <- tab[1:25,]
rownames(tab) <- tab$fam
tab$fam=NULL
xtable(tab)



#####
## FIGURE 1a-b: network grouped by fastgreedy cluster
#####

fam_graph <- read.graph('01_Data/02_Clean/fam_graph.graphml', format='graphml')

# give everyone positive centrality for size
all$bonw_02_wnind_size <- all$bonw_02_wnind_st - (min(all$bonw_02_wnind_st, na.rm=T)) + 0.01
all <- subset(all, select = c(fam, degree_all_uw, bonw_02_wnind_size, fastgreedy))


## merge centrality into graph object
fam_att <- vertex_attr(fam_graph)
fam_att <- merge(fam_att, all, by.x = 'name', by.y = 'fam', all.x = T)
fam_mat <- get.data.frame(fam_graph, what=c('edges'))
fam_graph <- graph.data.frame(fam_mat,vertices=fam_att,directed=F)


## create all elite and biz elite subgraphs
biz_graph <- induced_subgraph(fam_graph, which(V(fam_graph)$biz==1))
all_graph <- induced_subgraph(fam_graph, which(V(fam_graph)$biz==1 | V(fam_graph)$pol==1 | V(fam_graph)$mil==1))


## 
## business elite
##


## set edge weights for grouped layout
edge.weights <- function(community, network, weight.within = 50, weight.between = .001) {
  bridges <- igraph::crossing(communities = community, graph = network)
  weights <- ifelse(test = bridges, yes = weight.between, no = weight.within)
  return(weights) 
}

fastgreedy <- make_clusters(biz_graph, membership = V(biz_graph)$fastgreedy)
E(biz_graph)$fg_weight <- edge.weights(fastgreedy, biz_graph)

LO <- layout_with_fr(biz_graph, weights = E(biz_graph)$fg_weight, grid = 'nogrid')

## color nodes by cluster 
clrs <- rainbow(length(unique(V(biz_graph)$fastgreedy)), alpha=0.4)
set.seed(1984)
clrs <- sample(clrs, length(unique(V(biz_graph)$fastgreedy)))
V(biz_graph)$color <- clrs[as.numeric(as.factor(V(biz_graph)$fastgreedy))]
legend_cats <- data.frame(attr = unique(V(biz_graph)$"fastgreedy"),
                          color = unique(V(biz_graph)$color))
legend_cats <- legend_cats[order(legend_cats$attr),]

## color edges between coup participants alpha=1
coup <- V(biz_graph)[V(biz_graph)$coup==1]
E(biz_graph)$color <- rgb(0.5, 0.5, 0.5, .5)

## create coup legend
legend2_cats <- data.frame(attr = c('Participant', 'Non-Participant'), color = c('red', 'black'))


pdf('03_Figures/biz_network_fg.pdf')
plot(biz_graph, 
     layout = LO,
     edge.color = E(biz_graph)$color,
     vertex.color = V(biz_graph)$color, 
     vertex.frame.color = ifelse(V(biz_graph)$coup==1, 'red', 'black'),
     # vertex.size = ifelse(V(biz_graph)$coup==1, 7, 6),
     vertex.label="",
     vertex.size = 5)
legend(x = -1.45, y = -1.05, legend=legend_cats$attr, col = legend_cats$color, pch = 19, ncol = 8, title = "Neighborhood")
legend(x = .65, y = -1.05, legend=legend2_cats$attr, col = legend2_cats$color, pch = 1, title = "Coup")
dev.off()



## 
## all elite
##

## set edge weights for grouped layout
fastgreedy <- make_clusters(all_graph, membership = V(all_graph)$fastgreedy)
E(all_graph)$fg_weight <- edge.weights(fastgreedy, all_graph, weight.within = 50,weight.between = .001)

LO <- layout_with_fr(all_graph, weights = E(all_graph)$fg_weight, grid = 'nogrid')

## color nodes by cluster w coup participants alpha=1
clrs <- rainbow(length(unique(V(all_graph)$fastgreedy)), alpha=0.4)
set.seed(1278)
clrs <- sample(clrs, length(unique(V(all_graph)$fastgreedy)))
V(all_graph)$color <- clrs[as.numeric(as.factor(V(all_graph)$fastgreedy))]
legend_cats <- data.frame(attr = unique(V(all_graph)$"fastgreedy"),
                          color = unique(V(all_graph)$color))
legend_cats <- legend_cats[order(legend_cats$attr),]

## color edges between coup participants alpha=1
coup <- V(all_graph)[V(all_graph)$coup==1]
E(all_graph)$color <- rgb(0.5, 0.5, 0.5, 0.5)

## create coup legend
legend2_cats <- data.frame(attr = c('Participant', 'Non-Participant'), color = c('red', 'black'))


pdf('03_Figures/all_network_fg.pdf')
plot(all_graph, 
     layout = LO,
     edge.color = E(all_graph)$color,
     vertex.color = V(all_graph)$color, 
     vertex.frame.color = ifelse(V(all_graph)$coup==1, 'red','black'),
     vertex.label="",
     vertex.size = 5)
legend(x = -1.55, y = -1.01, legend=legend_cats$attr, col = legend_cats$color, pch = 19, ncol = 9, title = "Neighborhood")
legend(x = .75, y = -1.01, legend=legend2_cats$attr, col = legend2_cats$color, pch = 1, title = "Coup")
dev.off()



#####
## FIGURE 1c-d: histograms of centrality
#####

fam <- read.csv('01_Data/02_Clean/fam.csv')
all <- read.csv('01_Data/02_Clean/allfams.csv')

bw = 0.5

pdf('03_Figures/hist_cent_biz_log.pdf')
plot(density(log(na.omit(fam$bonw_02_wnind[fam$coup==1])), bw=bw), col = 'red',
     ylim = c(0,0.35),
     main = "Distribution of log centrality of coup plotters (red)
and non-plotters (grey)",
     xlab = paste0('Bandwidth = ',bw))
lines(density(log(na.omit(fam$bonw_02_wnind[fam$coup==0])), bw=bw), col = 'grey')
dev.off()

bw = 0.75

pdf('03_Figures/hist_cent_all_log.pdf')
plot(density(log(na.omit(all$bonw_02_wnind[fam$coup==1])), bw=bw), col = 'red',
     ylim = c(0,0.2),
     main = "Distribution of log centrality of coup plotters (red)
     and non-plotters (grey)",
     xlab = paste0('Bandwidth = ',bw))
lines(density(log(na.omit(all$bonw_02_wnind[fam$coup==0])), bw=bw), col = 'grey')
dev.off()






#####
## FIGURE A3: network of top 25 fams and direct connections
#####

top_fams <- rownames(tab)
top_graph <- induced_subgraph(fam_graph, v = which(V(fam_graph)$name %in% top_fams))

pdf('03_Figures/top_fams.pdf')
plot(top_graph, 
     vertex.color = ifelse(V(top_graph)$coup==1, 'red', 'grey'), 
     vertex.size = V(top_graph)$bonw_02_wnind_size*10,
     vertex.label.dist = 2)
dev.off()



#####
## Figure A5 a-b: good and bad reachability examples
#####

ps_graph <- read.graph('01_Data/02_Clean/gene_ps_all.graphml', 'graphml')

t <- delete.vertices(ps_graph, V(ps_graph)[V(ps_graph)[family!="CORVINGTON"]])
t <- as.undirected(t)
V(t)$size <- (V(t)$cohort - 1600) / 60
V(t)$size <- ifelse(is.na(V(t)$size)==T, 3, V(t)$size)

pdf('03_Figures/fam_reach_hi.pdf')
plot(t, 
     layout = layout.fruchterman.reingold,
     vertex.label = "",
     vertex.label=NA,
     edge.arrow.size = 0.1,
     edge.color = 'black',
     edge.width = 4,
     vertex.size = V(t)$size)
#      title(main = "High reachability family", cex.main = 1.5)
dev.off()



t <- delete.vertices(ps_graph, V(ps_graph)[V(ps_graph)[family!="DUPUY"]])
t <- as.undirected(t)
V(t)$size <- (V(t)$cohort - 1600) / 60
V(t)$size <- ifelse(is.na(V(t)$size)==T, 3, V(t)$size)

pdf('03_Figures/fam_reach_lo.pdf')
plot(t, 
     layout = layout.fruchterman.reingold,
     vertex.label = "",
     vertex.label=NA,
     edge.arrow.size = 0.1,
     edge.color = 'black',
     edge.width = 4,
     vertex.size = V(t)$size)
# title(main = "Low reachability family", cex.main = 1.5)
dev.off()


#####
## FIGURE A5c-d: histograms of reachability
#####

fam <- read.csv('01_Data/02_Clean/fam.csv')
all <- read.csv('01_Data/02_Clean/allfams.csv')

bw = 0.5

pdf('03_Figures/hist_reach_imp.pdf')
plot(density(log(na.omit(fam$reachability)), bw=bw), col = 'red',
     ylim = c(0,0.35),
     main = "Distribution of log centrality of coup plotters (red)
and non-plotters (grey)",
     xlab = paste0('Bandwidth = ',bw))
dev.off()

bw = 0.75

pdf('03_Figures/hist_reach_all.pdf')
plot(density(log(na.omit(all$reachability)), bw=bw), col = 'red',
     ylim = c(0,0.2),
     main = "Distribution of log centrality of coup plotters (red)
     and non-plotters (grey)",
     xlab = paste0('Bandwidth = ',bw))
dev.off()




